import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
df=pd.read_csv('C:\Datasets\hotel_bookings.csv')
df.head()
| hotel | is_canceled | lead_time | arrival_date_year | arrival_date_month | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | ... | deposit_type | agent | company | days_in_waiting_list | customer_type | adr | required_car_parking_spaces | total_of_special_requests | reservation_status | reservation_status_date | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Resort Hotel | 0 | 342 | 2015 | July | 27 | 1 | 0 | 0 | 2 | ... | No Deposit | NaN | NaN | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 |
| 1 | Resort Hotel | 0 | 737 | 2015 | July | 27 | 1 | 0 | 0 | 2 | ... | No Deposit | NaN | NaN | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 |
| 2 | Resort Hotel | 0 | 7 | 2015 | July | 27 | 1 | 0 | 1 | 1 | ... | No Deposit | NaN | NaN | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 |
| 3 | Resort Hotel | 0 | 13 | 2015 | July | 27 | 1 | 0 | 1 | 1 | ... | No Deposit | 304.0 | NaN | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 |
| 4 | Resort Hotel | 0 | 14 | 2015 | July | 27 | 1 | 0 | 2 | 2 | ... | No Deposit | 240.0 | NaN | 0 | Transient | 98.0 | 0 | 1 | Check-Out | 7/3/2015 |
5 rows × 32 columns
df.shape
(119390, 32)
df.columns
Index(['hotel', 'is_canceled', 'lead_time', 'arrival_date_year',
'arrival_date_month', 'arrival_date_week_number',
'arrival_date_day_of_month', 'stays_in_weekend_nights',
'stays_in_week_nights', 'adults', 'children', 'babies', 'meal',
'country', 'market_segment', 'distribution_channel',
'is_repeated_guest', 'previous_cancellations',
'previous_bookings_not_canceled', 'reserved_room_type',
'assigned_room_type', 'booking_changes', 'deposit_type', 'agent',
'company', 'days_in_waiting_list', 'customer_type', 'adr',
'required_car_parking_spaces', 'total_of_special_requests',
'reservation_status', 'reservation_status_date'],
dtype='object')
df.isnull().values.any()
True
df.isnull().sum()
hotel 0 is_canceled 0 lead_time 0 arrival_date_year 0 arrival_date_month 0 arrival_date_week_number 0 arrival_date_day_of_month 0 stays_in_weekend_nights 0 stays_in_week_nights 0 adults 0 children 4 babies 0 meal 0 country 488 market_segment 0 distribution_channel 0 is_repeated_guest 0 previous_cancellations 0 previous_bookings_not_canceled 0 reserved_room_type 0 assigned_room_type 0 booking_changes 0 deposit_type 0 agent 16340 company 112593 days_in_waiting_list 0 customer_type 0 adr 0 required_car_parking_spaces 0 total_of_special_requests 0 reservation_status 0 reservation_status_date 0 dtype: int64
# Replace missing values:
# agent: If no agency is given, booking was most likely made without one.
# company: If none given, it was most likely private.
# rest schould be self-explanatory.
df.fillna(0,inplace=True)
df.isnull().sum()
hotel 0 is_canceled 0 lead_time 0 arrival_date_year 0 arrival_date_month 0 arrival_date_week_number 0 arrival_date_day_of_month 0 stays_in_weekend_nights 0 stays_in_week_nights 0 adults 0 children 0 babies 0 meal 0 country 0 market_segment 0 distribution_channel 0 is_repeated_guest 0 previous_cancellations 0 previous_bookings_not_canceled 0 reserved_room_type 0 assigned_room_type 0 booking_changes 0 deposit_type 0 agent 0 company 0 days_in_waiting_list 0 customer_type 0 adr 0 required_car_parking_spaces 0 total_of_special_requests 0 reservation_status 0 reservation_status_date 0 dtype: int64
df['meal'].value_counts()
BB 92310 HB 14463 SC 10650 Undefined 1169 FB 798 Name: meal, dtype: int64
df['children'].unique()
array([ 0., 1., 2., 10., 3.])
df['adults'].unique()
array([ 2, 1, 3, 4, 40, 26, 50, 27, 55, 0, 20, 6, 5, 10],
dtype=int64)
df['babies'].unique()
array([ 0, 1, 2, 10, 9], dtype=int64)
### seems to have some dirtiness in data as Adults,babies & children cant be zero at a same time
df.shape
(119390, 32)
len(df[df['adults']==0])
403
filter=(df['children']==0) & (df['adults']==0) & (df['babies']==0)
df[filter]
| hotel | is_canceled | lead_time | arrival_date_year | arrival_date_month | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | ... | deposit_type | agent | company | days_in_waiting_list | customer_type | adr | required_car_parking_spaces | total_of_special_requests | reservation_status | reservation_status_date | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2224 | Resort Hotel | 0 | 1 | 2015 | October | 41 | 6 | 0 | 3 | 0 | ... | No Deposit | 0.0 | 174.0 | 0 | Transient-Party | 0.00 | 0 | 0 | Check-Out | 10/6/2015 |
| 2409 | Resort Hotel | 0 | 0 | 2015 | October | 42 | 12 | 0 | 0 | 0 | ... | No Deposit | 0.0 | 174.0 | 0 | Transient | 0.00 | 0 | 0 | Check-Out | 10/12/2015 |
| 3181 | Resort Hotel | 0 | 36 | 2015 | November | 47 | 20 | 1 | 2 | 0 | ... | No Deposit | 38.0 | 0.0 | 0 | Transient-Party | 0.00 | 0 | 0 | Check-Out | 11/23/2015 |
| 3684 | Resort Hotel | 0 | 165 | 2015 | December | 53 | 30 | 1 | 4 | 0 | ... | No Deposit | 308.0 | 0.0 | 122 | Transient-Party | 0.00 | 0 | 0 | Check-Out | 1/4/2016 |
| 3708 | Resort Hotel | 0 | 165 | 2015 | December | 53 | 30 | 2 | 4 | 0 | ... | No Deposit | 308.0 | 0.0 | 122 | Transient-Party | 0.00 | 0 | 0 | Check-Out | 1/5/2016 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 115029 | City Hotel | 0 | 107 | 2017 | June | 26 | 27 | 0 | 3 | 0 | ... | No Deposit | 7.0 | 0.0 | 0 | Transient | 100.80 | 0 | 0 | Check-Out | 6/30/2017 |
| 115091 | City Hotel | 0 | 1 | 2017 | June | 26 | 30 | 0 | 1 | 0 | ... | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.00 | 1 | 1 | Check-Out | 7/1/2017 |
| 116251 | City Hotel | 0 | 44 | 2017 | July | 28 | 15 | 1 | 1 | 0 | ... | No Deposit | 425.0 | 0.0 | 0 | Transient | 73.80 | 0 | 0 | Check-Out | 7/17/2017 |
| 116534 | City Hotel | 0 | 2 | 2017 | July | 28 | 15 | 2 | 5 | 0 | ... | No Deposit | 9.0 | 0.0 | 0 | Transient-Party | 22.86 | 0 | 1 | Check-Out | 7/22/2017 |
| 117087 | City Hotel | 0 | 170 | 2017 | July | 30 | 27 | 0 | 2 | 0 | ... | No Deposit | 52.0 | 0.0 | 0 | Transient | 0.00 | 0 | 0 | Check-Out | 7/29/2017 |
180 rows × 32 columns
### Visualise Entire Dataframe where adult,children & babies are 0
pd.set_option('display.max_columns',32)
filter=(df['children']==0) & (df['adults']==0) & (df['babies']==0)
df[filter]
| hotel | is_canceled | lead_time | arrival_date_year | arrival_date_month | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | children | babies | meal | country | market_segment | distribution_channel | is_repeated_guest | previous_cancellations | previous_bookings_not_canceled | reserved_room_type | assigned_room_type | booking_changes | deposit_type | agent | company | days_in_waiting_list | customer_type | adr | required_car_parking_spaces | total_of_special_requests | reservation_status | reservation_status_date | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2224 | Resort Hotel | 0 | 1 | 2015 | October | 41 | 6 | 0 | 3 | 0 | 0.0 | 0 | SC | PRT | Corporate | Corporate | 0 | 0 | 0 | A | I | 1 | No Deposit | 0.0 | 174.0 | 0 | Transient-Party | 0.00 | 0 | 0 | Check-Out | 10/6/2015 |
| 2409 | Resort Hotel | 0 | 0 | 2015 | October | 42 | 12 | 0 | 0 | 0 | 0.0 | 0 | SC | PRT | Corporate | Corporate | 0 | 0 | 0 | A | I | 0 | No Deposit | 0.0 | 174.0 | 0 | Transient | 0.00 | 0 | 0 | Check-Out | 10/12/2015 |
| 3181 | Resort Hotel | 0 | 36 | 2015 | November | 47 | 20 | 1 | 2 | 0 | 0.0 | 0 | SC | ESP | Groups | TA/TO | 0 | 0 | 0 | A | C | 0 | No Deposit | 38.0 | 0.0 | 0 | Transient-Party | 0.00 | 0 | 0 | Check-Out | 11/23/2015 |
| 3684 | Resort Hotel | 0 | 165 | 2015 | December | 53 | 30 | 1 | 4 | 0 | 0.0 | 0 | SC | PRT | Groups | TA/TO | 0 | 0 | 0 | A | A | 1 | No Deposit | 308.0 | 0.0 | 122 | Transient-Party | 0.00 | 0 | 0 | Check-Out | 1/4/2016 |
| 3708 | Resort Hotel | 0 | 165 | 2015 | December | 53 | 30 | 2 | 4 | 0 | 0.0 | 0 | SC | PRT | Groups | TA/TO | 0 | 0 | 0 | A | C | 1 | No Deposit | 308.0 | 0.0 | 122 | Transient-Party | 0.00 | 0 | 0 | Check-Out | 1/5/2016 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 115029 | City Hotel | 0 | 107 | 2017 | June | 26 | 27 | 0 | 3 | 0 | 0.0 | 0 | BB | CHE | Online TA | TA/TO | 0 | 0 | 0 | A | A | 1 | No Deposit | 7.0 | 0.0 | 0 | Transient | 100.80 | 0 | 0 | Check-Out | 6/30/2017 |
| 115091 | City Hotel | 0 | 1 | 2017 | June | 26 | 30 | 0 | 1 | 0 | 0.0 | 0 | SC | PRT | Complementary | Direct | 0 | 0 | 0 | E | K | 0 | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.00 | 1 | 1 | Check-Out | 7/1/2017 |
| 116251 | City Hotel | 0 | 44 | 2017 | July | 28 | 15 | 1 | 1 | 0 | 0.0 | 0 | SC | SWE | Online TA | TA/TO | 0 | 0 | 0 | A | K | 2 | No Deposit | 425.0 | 0.0 | 0 | Transient | 73.80 | 0 | 0 | Check-Out | 7/17/2017 |
| 116534 | City Hotel | 0 | 2 | 2017 | July | 28 | 15 | 2 | 5 | 0 | 0.0 | 0 | SC | RUS | Online TA | TA/TO | 0 | 0 | 0 | A | K | 1 | No Deposit | 9.0 | 0.0 | 0 | Transient-Party | 22.86 | 0 | 1 | Check-Out | 7/22/2017 |
| 117087 | City Hotel | 0 | 170 | 2017 | July | 30 | 27 | 0 | 2 | 0 | 0.0 | 0 | BB | BRA | Offline TA/TO | TA/TO | 0 | 0 | 0 | A | A | 0 | No Deposit | 52.0 | 0.0 | 0 | Transient | 0.00 | 0 | 0 | Check-Out | 7/29/2017 |
180 rows × 32 columns
data=df[~filter]
data.shape
(119210, 32)
data.head()
| hotel | is_canceled | lead_time | arrival_date_year | arrival_date_month | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | children | babies | meal | country | market_segment | distribution_channel | is_repeated_guest | previous_cancellations | previous_bookings_not_canceled | reserved_room_type | assigned_room_type | booking_changes | deposit_type | agent | company | days_in_waiting_list | customer_type | adr | required_car_parking_spaces | total_of_special_requests | reservation_status | reservation_status_date | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Resort Hotel | 0 | 342 | 2015 | July | 27 | 1 | 0 | 0 | 2 | 0.0 | 0 | BB | PRT | Direct | Direct | 0 | 0 | 0 | C | C | 3 | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 |
| 1 | Resort Hotel | 0 | 737 | 2015 | July | 27 | 1 | 0 | 0 | 2 | 0.0 | 0 | BB | PRT | Direct | Direct | 0 | 0 | 0 | C | C | 4 | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 |
| 2 | Resort Hotel | 0 | 7 | 2015 | July | 27 | 1 | 0 | 1 | 1 | 0.0 | 0 | BB | GBR | Direct | Direct | 0 | 0 | 0 | A | C | 0 | No Deposit | 0.0 | 0.0 | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 |
| 3 | Resort Hotel | 0 | 13 | 2015 | July | 27 | 1 | 0 | 1 | 1 | 0.0 | 0 | BB | GBR | Corporate | Corporate | 0 | 0 | 0 | A | A | 0 | No Deposit | 304.0 | 0.0 | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 |
| 4 | Resort Hotel | 0 | 14 | 2015 | July | 27 | 1 | 0 | 2 | 2 | 0.0 | 0 | BB | GBR | Online TA | TA/TO | 0 | 0 | 0 | A | A | 0 | No Deposit | 240.0 | 0.0 | 0 | Transient | 98.0 | 0 | 1 | Check-Out | 7/3/2015 |
resort = data[(data["hotel"] == "Resort Hotel") & (data["is_canceled"] == 0)]
city = data[(data["hotel"] == "City Hotel") & (data["is_canceled"] == 0)]
resort.shape
(28927, 32)
city.shape
(46084, 32)
resort.head()
| hotel | is_canceled | lead_time | arrival_date_year | arrival_date_month | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | children | babies | meal | country | market_segment | distribution_channel | is_repeated_guest | previous_cancellations | previous_bookings_not_canceled | reserved_room_type | assigned_room_type | booking_changes | deposit_type | agent | company | days_in_waiting_list | customer_type | adr | required_car_parking_spaces | total_of_special_requests | reservation_status | reservation_status_date | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Resort Hotel | 0 | 342 | 2015 | July | 27 | 1 | 0 | 0 | 2 | 0.0 | 0 | BB | PRT | Direct | Direct | 0 | 0 | 0 | C | C | 3 | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 |
| 1 | Resort Hotel | 0 | 737 | 2015 | July | 27 | 1 | 0 | 0 | 2 | 0.0 | 0 | BB | PRT | Direct | Direct | 0 | 0 | 0 | C | C | 4 | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 |
| 2 | Resort Hotel | 0 | 7 | 2015 | July | 27 | 1 | 0 | 1 | 1 | 0.0 | 0 | BB | GBR | Direct | Direct | 0 | 0 | 0 | A | C | 0 | No Deposit | 0.0 | 0.0 | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 |
| 3 | Resort Hotel | 0 | 13 | 2015 | July | 27 | 1 | 0 | 1 | 1 | 0.0 | 0 | BB | GBR | Corporate | Corporate | 0 | 0 | 0 | A | A | 0 | No Deposit | 304.0 | 0.0 | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 |
| 4 | Resort Hotel | 0 | 14 | 2015 | July | 27 | 1 | 0 | 2 | 2 | 0.0 | 0 | BB | GBR | Online TA | TA/TO | 0 | 0 | 0 | A | A | 0 | No Deposit | 240.0 | 0.0 | 0 | Transient | 98.0 | 0 | 1 | Check-Out | 7/3/2015 |
labels=resort['country'].value_counts().index
values=resort['country'].value_counts()
import plotly.graph_objs as go
from plotly.offline import iplot
import plotly.express as px
country_data=data[data['is_canceled']==0]['country'].value_counts().reset_index()
country_data.columns=['country','No of guests']
country_data
| country | No of guests | |
|---|---|---|
| 0 | PRT | 20977 |
| 1 | GBR | 9668 |
| 2 | FRA | 8468 |
| 3 | ESP | 6383 |
| 4 | DEU | 6067 |
| ... | ... | ... |
| 161 | DJI | 1 |
| 162 | NPL | 1 |
| 163 | PLW | 1 |
| 164 | BWA | 1 |
| 165 | BFA | 1 |
166 rows × 2 columns
import folium
from folium.plugins import HeatMap
basemap=folium.Map()
country_data.dtypes
country object No of guests int64 dtype: object
# show on map
map_guest = px.choropleth(country_data,
locations=country_data['country'],
color=country_data['No of guests'],
hover_name=country_data['country'],
title="Home country of guests")
map_guest.show()
data.head()
| hotel | is_canceled | lead_time | arrival_date_year | arrival_date_month | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | children | babies | meal | country | market_segment | distribution_channel | is_repeated_guest | previous_cancellations | previous_bookings_not_canceled | reserved_room_type | assigned_room_type | booking_changes | deposit_type | agent | company | days_in_waiting_list | customer_type | adr | required_car_parking_spaces | total_of_special_requests | reservation_status | reservation_status_date | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Resort Hotel | 0 | 342 | 2015 | July | 27 | 1 | 0 | 0 | 2 | 0.0 | 0 | BB | PRT | Direct | Direct | 0 | 0 | 0 | C | C | 3 | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 |
| 1 | Resort Hotel | 0 | 737 | 2015 | July | 27 | 1 | 0 | 0 | 2 | 0.0 | 0 | BB | PRT | Direct | Direct | 0 | 0 | 0 | C | C | 4 | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 |
| 2 | Resort Hotel | 0 | 7 | 2015 | July | 27 | 1 | 0 | 1 | 1 | 0.0 | 0 | BB | GBR | Direct | Direct | 0 | 0 | 0 | A | C | 0 | No Deposit | 0.0 | 0.0 | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 |
| 3 | Resort Hotel | 0 | 13 | 2015 | July | 27 | 1 | 0 | 1 | 1 | 0.0 | 0 | BB | GBR | Corporate | Corporate | 0 | 0 | 0 | A | A | 0 | No Deposit | 304.0 | 0.0 | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 |
| 4 | Resort Hotel | 0 | 14 | 2015 | July | 27 | 1 | 0 | 2 | 2 | 0.0 | 0 | BB | GBR | Online TA | TA/TO | 0 | 0 | 0 | A | A | 0 | No Deposit | 240.0 | 0.0 | 0 | Transient | 98.0 | 0 | 1 | Check-Out | 7/3/2015 |
data2=data[data['is_canceled']==0]
# boxplot:
plt.figure(figsize=(12, 8))
sns.boxplot(x="reserved_room_type",
y="adr",
hue="hotel",
data=data2)
plt.title("Price of room types per night and person", fontsize=16)
plt.xlabel("Room type", fontsize=16)
plt.ylabel("Price [EUR]", fontsize=16)
plt.legend(loc="upper right")
plt.ylim(0, 600)
plt.show()
data_resort=resort[resort['is_canceled']==0]
data_city=city[city['is_canceled']==0]
data_resort.head()
| hotel | is_canceled | lead_time | arrival_date_year | arrival_date_month | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | children | babies | meal | country | market_segment | distribution_channel | is_repeated_guest | previous_cancellations | previous_bookings_not_canceled | reserved_room_type | assigned_room_type | booking_changes | deposit_type | agent | company | days_in_waiting_list | customer_type | adr | required_car_parking_spaces | total_of_special_requests | reservation_status | reservation_status_date | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Resort Hotel | 0 | 342 | 2015 | July | 27 | 1 | 0 | 0 | 2 | 0.0 | 0 | BB | PRT | Direct | Direct | 0 | 0 | 0 | C | C | 3 | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 |
| 1 | Resort Hotel | 0 | 737 | 2015 | July | 27 | 1 | 0 | 0 | 2 | 0.0 | 0 | BB | PRT | Direct | Direct | 0 | 0 | 0 | C | C | 4 | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 |
| 2 | Resort Hotel | 0 | 7 | 2015 | July | 27 | 1 | 0 | 1 | 1 | 0.0 | 0 | BB | GBR | Direct | Direct | 0 | 0 | 0 | A | C | 0 | No Deposit | 0.0 | 0.0 | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 |
| 3 | Resort Hotel | 0 | 13 | 2015 | July | 27 | 1 | 0 | 1 | 1 | 0.0 | 0 | BB | GBR | Corporate | Corporate | 0 | 0 | 0 | A | A | 0 | No Deposit | 304.0 | 0.0 | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 |
| 4 | Resort Hotel | 0 | 14 | 2015 | July | 27 | 1 | 0 | 2 | 2 | 0.0 | 0 | BB | GBR | Online TA | TA/TO | 0 | 0 | 0 | A | A | 0 | No Deposit | 240.0 | 0.0 | 0 | Transient | 98.0 | 0 | 1 | Check-Out | 7/3/2015 |
resort_hotel=data_resort.groupby(['arrival_date_month'])['adr'].mean().reset_index()
resort_hotel
| arrival_date_month | adr | |
|---|---|---|
| 0 | April | 75.867816 |
| 1 | August | 181.205892 |
| 2 | December | 68.410104 |
| 3 | February | 54.147478 |
| 4 | January | 48.761125 |
| 5 | July | 150.122528 |
| 6 | June | 107.974850 |
| 7 | March | 57.056838 |
| 8 | May | 76.657558 |
| 9 | November | 48.706289 |
| 10 | October | 61.775449 |
| 11 | September | 96.416860 |
city_hotel=data_city.groupby(['arrival_date_month'])['adr'].mean().reset_index()
city_hotel
| arrival_date_month | adr | |
|---|---|---|
| 0 | April | 111.962267 |
| 1 | August | 118.674598 |
| 2 | December | 88.401855 |
| 3 | February | 86.520062 |
| 4 | January | 82.330983 |
| 5 | July | 115.818019 |
| 6 | June | 117.874360 |
| 7 | March | 90.658533 |
| 8 | May | 120.669827 |
| 9 | November | 86.946592 |
| 10 | October | 102.004672 |
| 11 | September | 112.776582 |
final=resort_hotel.merge(city_hotel,on='arrival_date_month')
final.columns=['month','price_for_resort','price_for_city_hotel']
final
| month | price_for_resort | price_for_city_hotel | |
|---|---|---|---|
| 0 | April | 75.867816 | 111.962267 |
| 1 | August | 181.205892 | 118.674598 |
| 2 | December | 68.410104 | 88.401855 |
| 3 | February | 54.147478 | 86.520062 |
| 4 | January | 48.761125 | 82.330983 |
| 5 | July | 150.122528 | 115.818019 |
| 6 | June | 107.974850 | 117.874360 |
| 7 | March | 57.056838 | 90.658533 |
| 8 | May | 76.657558 | 120.669827 |
| 9 | November | 48.706289 | 86.946592 |
| 10 | October | 61.775449 | 102.004672 |
| 11 | September | 96.416860 | 112.776582 |
import sort_dataframeby_monthorweek as sd
final=sd.Sort_Dataframeby_Month(df=final,monthcolumnname='month')
final
| month | price_for_resort | price_for_city_hotel | |
|---|---|---|---|
| 0 | January | 48.761125 | 82.330983 |
| 1 | February | 54.147478 | 86.520062 |
| 2 | March | 57.056838 | 90.658533 |
| 3 | April | 75.867816 | 111.962267 |
| 4 | May | 76.657558 | 120.669827 |
| 5 | June | 107.974850 | 117.874360 |
| 6 | July | 150.122528 | 115.818019 |
| 7 | August | 181.205892 | 118.674598 |
| 8 | September | 96.416860 | 112.776582 |
| 9 | October | 61.775449 | 102.004672 |
| 10 | November | 48.706289 | 86.946592 |
| 11 | December | 68.410104 | 88.401855 |
px.line(final, x='month', y=['price_for_resort','price_for_city_hotel'], title='Room price per night over the Months')
This clearly shows that the prices in the Resort hotel are much higher during the summer (no surprise here). The price of the city hotel varies less and is most expensive during spring and autumn.
data.head()
| hotel | is_canceled | lead_time | arrival_date_year | arrival_date_month | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | children | babies | meal | country | market_segment | distribution_channel | is_repeated_guest | previous_cancellations | previous_bookings_not_canceled | reserved_room_type | assigned_room_type | booking_changes | deposit_type | agent | company | days_in_waiting_list | customer_type | adr | required_car_parking_spaces | total_of_special_requests | reservation_status | reservation_status_date | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Resort Hotel | 0 | 342 | 2015 | July | 27 | 1 | 0 | 0 | 2 | 0.0 | 0 | BB | PRT | Direct | Direct | 0 | 0 | 0 | C | C | 3 | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 |
| 1 | Resort Hotel | 0 | 737 | 2015 | July | 27 | 1 | 0 | 0 | 2 | 0.0 | 0 | BB | PRT | Direct | Direct | 0 | 0 | 0 | C | C | 4 | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 |
| 2 | Resort Hotel | 0 | 7 | 2015 | July | 27 | 1 | 0 | 1 | 1 | 0.0 | 0 | BB | GBR | Direct | Direct | 0 | 0 | 0 | A | C | 0 | No Deposit | 0.0 | 0.0 | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 |
| 3 | Resort Hotel | 0 | 13 | 2015 | July | 27 | 1 | 0 | 1 | 1 | 0.0 | 0 | BB | GBR | Corporate | Corporate | 0 | 0 | 0 | A | A | 0 | No Deposit | 304.0 | 0.0 | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 |
| 4 | Resort Hotel | 0 | 14 | 2015 | July | 27 | 1 | 0 | 2 | 2 | 0.0 | 0 | BB | GBR | Online TA | TA/TO | 0 | 0 | 0 | A | A | 0 | No Deposit | 240.0 | 0.0 | 0 | Transient | 98.0 | 0 | 1 | Check-Out | 7/3/2015 |
plt.figure(figsize = (15,10))
sns.boxplot(x = "market_segment", y = "stays_in_week_nights", data = data, hue = "hotel", palette = 'Set1');
It can be seen that most of the groups are normal distributed, some of them have high skewness. Looking at the distribution, most people do not seem to prefer to stay at the hotel for more than 1 week. But it seems normal to stay in resort hotels for up to 12-13 days. It is obvious that when people go to resort hotels, they prefer to stay more.
px.pie(data,names=data['meal'].value_counts().index,values=data['meal'].value_counts().values,hole=0.5)
The donut pie graph shows the meal categories. There is a big difference in the Bed & Breakfast category and the others. Almost 80% of bookings reserved for Bed&Breakfast.
plt.figure(figsize=(8,5))
sns.countplot(x='total_of_special_requests', data=data, palette = 'ocean_r')
plt.title('Total Special Request')
plt.show()
Around 55% of bookings do not have any special requests
plt.figure(figsize=(8,5))
sns.countplot(x='total_of_special_requests', data=data,hue='hotel')
plt.title('Total Special Request')
plt.show()
City hotel customers have more special requests.
data.head()
| hotel | is_canceled | lead_time | arrival_date_year | arrival_date_month | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | children | babies | meal | country | market_segment | distribution_channel | is_repeated_guest | previous_cancellations | previous_bookings_not_canceled | reserved_room_type | assigned_room_type | booking_changes | deposit_type | agent | company | days_in_waiting_list | customer_type | adr | required_car_parking_spaces | total_of_special_requests | reservation_status | reservation_status_date | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Resort Hotel | 0 | 342 | 2015 | July | 27 | 1 | 0 | 0 | 2 | 0.0 | 0 | BB | PRT | Direct | Direct | 0 | 0 | 0 | C | C | 3 | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 |
| 1 | Resort Hotel | 0 | 737 | 2015 | July | 27 | 1 | 0 | 0 | 2 | 0.0 | 0 | BB | PRT | Direct | Direct | 0 | 0 | 0 | C | C | 4 | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 |
| 2 | Resort Hotel | 0 | 7 | 2015 | July | 27 | 1 | 0 | 1 | 1 | 0.0 | 0 | BB | GBR | Direct | Direct | 0 | 0 | 0 | A | C | 0 | No Deposit | 0.0 | 0.0 | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 |
| 3 | Resort Hotel | 0 | 13 | 2015 | July | 27 | 1 | 0 | 1 | 1 | 0.0 | 0 | BB | GBR | Corporate | Corporate | 0 | 0 | 0 | A | A | 0 | No Deposit | 304.0 | 0.0 | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 |
| 4 | Resort Hotel | 0 | 14 | 2015 | July | 27 | 1 | 0 | 2 | 2 | 0.0 | 0 | BB | GBR | Online TA | TA/TO | 0 | 0 | 0 | A | A | 0 | No Deposit | 240.0 | 0.0 | 0 | Transient | 98.0 | 0 | 1 | Check-Out | 7/3/2015 |
pivot=data.groupby([ 'total_of_special_requests', 'is_canceled']).agg({'total_of_special_requests':'count'}).rename(columns={'total_of_special_requests':'count'}).unstack()
pivot
| count | ||
|---|---|---|
| is_canceled | 0 | 1 |
| total_of_special_requests | ||
| 0 | 36667 | 33534 |
| 1 | 25867 | 7316 |
| 2 | 10086 | 2866 |
| 3 | 2049 | 445 |
| 4 | 304 | 36 |
| 5 | 38 | 2 |
pivot.plot(kind='bar')
plt.show()
This graph is about the relationship between special requests and cancellation booking status. Nearly half bookings without any special requests have been cancelled and another half of them have not been canceled. We can also see that the more special requests the customers have the smaller the chances of cancellation.
data_resort.head()
| hotel | is_canceled | lead_time | arrival_date_year | arrival_date_month | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | children | babies | meal | country | market_segment | distribution_channel | is_repeated_guest | previous_cancellations | previous_bookings_not_canceled | reserved_room_type | assigned_room_type | booking_changes | deposit_type | agent | company | days_in_waiting_list | customer_type | adr | required_car_parking_spaces | total_of_special_requests | reservation_status | reservation_status_date | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Resort Hotel | 0 | 342 | 2015 | July | 27 | 1 | 0 | 0 | 2 | 0.0 | 0 | BB | PRT | Direct | Direct | 0 | 0 | 0 | C | C | 3 | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 |
| 1 | Resort Hotel | 0 | 737 | 2015 | July | 27 | 1 | 0 | 0 | 2 | 0.0 | 0 | BB | PRT | Direct | Direct | 0 | 0 | 0 | C | C | 4 | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 |
| 2 | Resort Hotel | 0 | 7 | 2015 | July | 27 | 1 | 0 | 1 | 1 | 0.0 | 0 | BB | GBR | Direct | Direct | 0 | 0 | 0 | A | C | 0 | No Deposit | 0.0 | 0.0 | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 |
| 3 | Resort Hotel | 0 | 13 | 2015 | July | 27 | 1 | 0 | 1 | 1 | 0.0 | 0 | BB | GBR | Corporate | Corporate | 0 | 0 | 0 | A | A | 0 | No Deposit | 304.0 | 0.0 | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 |
| 4 | Resort Hotel | 0 | 14 | 2015 | July | 27 | 1 | 0 | 2 | 2 | 0.0 | 0 | BB | GBR | Online TA | TA/TO | 0 | 0 | 0 | A | A | 0 | No Deposit | 240.0 | 0.0 | 0 | Transient | 98.0 | 0 | 1 | Check-Out | 7/3/2015 |
rush_resort=data_resort['arrival_date_month'].value_counts().reset_index()
rush_resort.columns=['month','no of guests']
rush_resort
| month | no of guests | |
|---|---|---|
| 0 | August | 3257 |
| 1 | July | 3137 |
| 2 | October | 2575 |
| 3 | March | 2571 |
| 4 | April | 2550 |
| 5 | May | 2535 |
| 6 | February | 2308 |
| 7 | September | 2102 |
| 8 | June | 2037 |
| 9 | December | 2014 |
| 10 | November | 1975 |
| 11 | January | 1866 |
rush_city=data_city['arrival_date_month'].value_counts().reset_index()
rush_city.columns=['month','no of guests']
rush_city
| month | no of guests | |
|---|---|---|
| 0 | August | 5367 |
| 1 | July | 4770 |
| 2 | May | 4568 |
| 3 | June | 4358 |
| 4 | October | 4326 |
| 5 | September | 4283 |
| 6 | March | 4049 |
| 7 | April | 4010 |
| 8 | February | 3051 |
| 9 | November | 2676 |
| 10 | December | 2377 |
| 11 | January | 2249 |
final_rush=rush_resort.merge(rush_city,on='month')
final_rush.columns=['month','no of guests in resort','no of guest in city hotel']
final_rush
| month | no of guests in resort | no of guest in city hotel | |
|---|---|---|---|
| 0 | August | 3257 | 5367 |
| 1 | July | 3137 | 4770 |
| 2 | October | 2575 | 4326 |
| 3 | March | 2571 | 4049 |
| 4 | April | 2550 | 4010 |
| 5 | May | 2535 | 4568 |
| 6 | February | 2308 | 3051 |
| 7 | September | 2102 | 4283 |
| 8 | June | 2037 | 4358 |
| 9 | December | 2014 | 2377 |
| 10 | November | 1975 | 2676 |
| 11 | January | 1866 | 2249 |
final_rush=sd.Sort_Dataframeby_Month(df=final_rush,monthcolumnname='month')
final_rush
| month | no of guests in resort | no of guest in city hotel | |
|---|---|---|---|
| 0 | January | 1866 | 2249 |
| 1 | February | 2308 | 3051 |
| 2 | March | 2571 | 4049 |
| 3 | April | 2550 | 4010 |
| 4 | May | 2535 | 4568 |
| 5 | June | 2037 | 4358 |
| 6 | July | 3137 | 4770 |
| 7 | August | 3257 | 5367 |
| 8 | September | 2102 | 4283 |
| 9 | October | 2575 | 4326 |
| 10 | November | 1975 | 2676 |
| 11 | December | 2014 | 2377 |
final_rush.dtypes
month object no of guests in resort int64 no of guest in city hotel int64 dtype: object
final_rush.columns
Index(['month', 'no of guests in resort', 'no of guest in city hotel'], dtype='object')
px.line(data_frame=final_rush, x='month', y=['no of guests in resort','no of guest in city hotel'], title='Total no of guests per Month')
The City hotel has more guests during spring and autumn, when the prices are also highest.
In July and August there are less visitors, although prices are lower.
Guest numbers for the Resort hotel go down slighty from June to September, which is also when the prices are highest.
Both hotels have the fewest guests during the winter.
filter=data['is_canceled']==0
clean_data=data[filter]
clean_data.head()
| hotel | is_canceled | lead_time | arrival_date_year | arrival_date_month | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | children | babies | meal | country | market_segment | distribution_channel | is_repeated_guest | previous_cancellations | previous_bookings_not_canceled | reserved_room_type | assigned_room_type | booking_changes | deposit_type | agent | company | days_in_waiting_list | customer_type | adr | required_car_parking_spaces | total_of_special_requests | reservation_status | reservation_status_date | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Resort Hotel | 0 | 342 | 2015 | July | 27 | 1 | 0 | 0 | 2 | 0.0 | 0 | BB | PRT | Direct | Direct | 0 | 0 | 0 | C | C | 3 | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 |
| 1 | Resort Hotel | 0 | 737 | 2015 | July | 27 | 1 | 0 | 0 | 2 | 0.0 | 0 | BB | PRT | Direct | Direct | 0 | 0 | 0 | C | C | 4 | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 |
| 2 | Resort Hotel | 0 | 7 | 2015 | July | 27 | 1 | 0 | 1 | 1 | 0.0 | 0 | BB | GBR | Direct | Direct | 0 | 0 | 0 | A | C | 0 | No Deposit | 0.0 | 0.0 | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 |
| 3 | Resort Hotel | 0 | 13 | 2015 | July | 27 | 1 | 0 | 1 | 1 | 0.0 | 0 | BB | GBR | Corporate | Corporate | 0 | 0 | 0 | A | A | 0 | No Deposit | 304.0 | 0.0 | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 |
| 4 | Resort Hotel | 0 | 14 | 2015 | July | 27 | 1 | 0 | 2 | 2 | 0.0 | 0 | BB | GBR | Online TA | TA/TO | 0 | 0 | 0 | A | A | 0 | No Deposit | 240.0 | 0.0 | 0 | Transient | 98.0 | 0 | 1 | Check-Out | 7/3/2015 |
clean_data["total_nights"] = clean_data["stays_in_weekend_nights"] + clean_data["stays_in_week_nights"]
c:\users\sl149\appdata\local\programs\python\python36\lib\site-packages\ipykernel_launcher.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
clean_data.head()
| hotel | is_canceled | lead_time | arrival_date_year | arrival_date_month | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | children | babies | meal | country | market_segment | distribution_channel | ... | previous_cancellations | previous_bookings_not_canceled | reserved_room_type | assigned_room_type | booking_changes | deposit_type | agent | company | days_in_waiting_list | customer_type | adr | required_car_parking_spaces | total_of_special_requests | reservation_status | reservation_status_date | total_nights | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Resort Hotel | 0 | 342 | 2015 | July | 27 | 1 | 0 | 0 | 2 | 0.0 | 0 | BB | PRT | Direct | Direct | ... | 0 | 0 | C | C | 3 | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 | 0 |
| 1 | Resort Hotel | 0 | 737 | 2015 | July | 27 | 1 | 0 | 0 | 2 | 0.0 | 0 | BB | PRT | Direct | Direct | ... | 0 | 0 | C | C | 4 | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 | 0 |
| 2 | Resort Hotel | 0 | 7 | 2015 | July | 27 | 1 | 0 | 1 | 1 | 0.0 | 0 | BB | GBR | Direct | Direct | ... | 0 | 0 | A | C | 0 | No Deposit | 0.0 | 0.0 | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 | 1 |
| 3 | Resort Hotel | 0 | 13 | 2015 | July | 27 | 1 | 0 | 1 | 1 | 0.0 | 0 | BB | GBR | Corporate | Corporate | ... | 0 | 0 | A | A | 0 | No Deposit | 304.0 | 0.0 | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 | 1 |
| 4 | Resort Hotel | 0 | 14 | 2015 | July | 27 | 1 | 0 | 2 | 2 | 0.0 | 0 | BB | GBR | Online TA | TA/TO | ... | 0 | 0 | A | A | 0 | No Deposit | 240.0 | 0.0 | 0 | Transient | 98.0 | 0 | 1 | Check-Out | 7/3/2015 | 2 |
5 rows × 33 columns
stay=clean_data.groupby(['total_nights','hotel']).agg('count').reset_index()
stay=stay.iloc[:,0:3]
stay.head()
| total_nights | hotel | is_canceled | |
|---|---|---|---|
| 0 | 0 | City Hotel | 251 |
| 1 | 0 | Resort Hotel | 371 |
| 2 | 1 | City Hotel | 9155 |
| 3 | 1 | Resort Hotel | 6579 |
| 4 | 2 | City Hotel | 10983 |
stay=stay.rename(columns={'is_canceled':'Number of stays'})
stay.head()
| total_nights | hotel | Number of stays | |
|---|---|---|---|
| 0 | 0 | City Hotel | 251 |
| 1 | 0 | Resort Hotel | 371 |
| 2 | 1 | City Hotel | 9155 |
| 3 | 1 | Resort Hotel | 6579 |
| 4 | 2 | City Hotel | 10983 |
plt.figure(figsize=(20, 8))
sns.barplot(x = "total_nights", y = "Number of stays" , hue="hotel",
hue_order = ["City Hotel", "Resort Hotel"], data=stay)
plt.show()
As we observed before. Resort Hotel customers tend to stay more nights then City Hotel customers.
clean_data['market_segment'].value_counts()
Online TA 35673 Offline TA/TO 15880 Direct 10648 Groups 7697 Corporate 4291 Complementary 639 Aviation 183 Name: market_segment, dtype: int64
# pie plot
fig=px.pie(clean_data,
values=clean_data['market_segment'].value_counts().values,
names=clean_data['market_segment'].value_counts().index,
title="Bookings per market segment")
fig.update_traces(rotation=-90, textinfo="percent+label")
fig.show()
cancel=data[data['is_canceled']==1]
rh_cancelations = cancel[cancel["hotel"] == "Resort Hotel"]["is_canceled"].sum()
ch_cancelations = cancel[cancel["hotel"] == "City Hotel"]["is_canceled"].sum()
rh_cancelations
11120
ch_cancelations
33079
### convert entire stats into percentage
px.pie(values=[11120,33079],names=[rh_cancelations,ch_cancelations])
data.head()
| hotel | is_canceled | lead_time | arrival_date_year | arrival_date_month | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | children | babies | meal | country | market_segment | distribution_channel | is_repeated_guest | previous_cancellations | previous_bookings_not_canceled | reserved_room_type | assigned_room_type | booking_changes | deposit_type | agent | company | days_in_waiting_list | customer_type | adr | required_car_parking_spaces | total_of_special_requests | reservation_status | reservation_status_date | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Resort Hotel | 0 | 342 | 2015 | July | 27 | 1 | 0 | 0 | 2 | 0.0 | 0 | BB | PRT | Direct | Direct | 0 | 0 | 0 | C | C | 3 | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 |
| 1 | Resort Hotel | 0 | 737 | 2015 | July | 27 | 1 | 0 | 0 | 2 | 0.0 | 0 | BB | PRT | Direct | Direct | 0 | 0 | 0 | C | C | 4 | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 |
| 2 | Resort Hotel | 0 | 7 | 2015 | July | 27 | 1 | 0 | 1 | 1 | 0.0 | 0 | BB | GBR | Direct | Direct | 0 | 0 | 0 | A | C | 0 | No Deposit | 0.0 | 0.0 | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 |
| 3 | Resort Hotel | 0 | 13 | 2015 | July | 27 | 1 | 0 | 1 | 1 | 0.0 | 0 | BB | GBR | Corporate | Corporate | 0 | 0 | 0 | A | A | 0 | No Deposit | 304.0 | 0.0 | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 |
| 4 | Resort Hotel | 0 | 14 | 2015 | July | 27 | 1 | 0 | 2 | 2 | 0.0 | 0 | BB | GBR | Online TA | TA/TO | 0 | 0 | 0 | A | A | 0 | No Deposit | 240.0 | 0.0 | 0 | Transient | 98.0 | 0 | 1 | Check-Out | 7/3/2015 |
cancellation=data[data['is_canceled']==1]
cancellation.head()
| hotel | is_canceled | lead_time | arrival_date_year | arrival_date_month | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | children | babies | meal | country | market_segment | distribution_channel | is_repeated_guest | previous_cancellations | previous_bookings_not_canceled | reserved_room_type | assigned_room_type | booking_changes | deposit_type | agent | company | days_in_waiting_list | customer_type | adr | required_car_parking_spaces | total_of_special_requests | reservation_status | reservation_status_date | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 8 | Resort Hotel | 1 | 85 | 2015 | July | 27 | 1 | 0 | 3 | 2 | 0.0 | 0 | BB | PRT | Online TA | TA/TO | 0 | 0 | 0 | A | A | 0 | No Deposit | 240.0 | 0.0 | 0 | Transient | 82.0 | 0 | 1 | Canceled | 5/6/2015 |
| 9 | Resort Hotel | 1 | 75 | 2015 | July | 27 | 1 | 0 | 3 | 2 | 0.0 | 0 | HB | PRT | Offline TA/TO | TA/TO | 0 | 0 | 0 | D | D | 0 | No Deposit | 15.0 | 0.0 | 0 | Transient | 105.5 | 0 | 0 | Canceled | 4/22/2015 |
| 10 | Resort Hotel | 1 | 23 | 2015 | July | 27 | 1 | 0 | 4 | 2 | 0.0 | 0 | BB | PRT | Online TA | TA/TO | 0 | 0 | 0 | E | E | 0 | No Deposit | 240.0 | 0.0 | 0 | Transient | 123.0 | 0 | 0 | Canceled | 6/23/2015 |
| 27 | Resort Hotel | 1 | 60 | 2015 | July | 27 | 1 | 2 | 5 | 2 | 0.0 | 0 | BB | PRT | Online TA | TA/TO | 0 | 0 | 0 | E | E | 0 | No Deposit | 240.0 | 0.0 | 0 | Transient | 107.0 | 0 | 2 | Canceled | 5/11/2015 |
| 32 | Resort Hotel | 1 | 96 | 2015 | July | 27 | 1 | 2 | 8 | 2 | 0.0 | 0 | BB | PRT | Direct | Direct | 0 | 0 | 0 | E | E | 0 | No Deposit | 0.0 | 0.0 | 0 | Transient | 108.3 | 0 | 2 | Canceled | 5/29/2015 |
cancellation['hotel'].unique()
array(['Resort Hotel', 'City Hotel'], dtype=object)
cancel_month=data.groupby(['arrival_date_month','hotel']).agg('count').reset_index()
cancelled=cancel_month.iloc[:,0:3]
cancelled
| arrival_date_month | hotel | is_canceled | |
|---|---|---|---|
| 0 | April | City Hotel | 7469 |
| 1 | April | Resort Hotel | 3609 |
| 2 | August | City Hotel | 8967 |
| 3 | August | Resort Hotel | 4894 |
| 4 | December | City Hotel | 4114 |
| 5 | December | Resort Hotel | 2645 |
| 6 | February | City Hotel | 4950 |
| 7 | February | Resort Hotel | 3102 |
| 8 | January | City Hotel | 3730 |
| 9 | January | Resort Hotel | 2191 |
| 10 | July | City Hotel | 8071 |
| 11 | July | Resort Hotel | 4573 |
| 12 | June | City Hotel | 7885 |
| 13 | June | Resort Hotel | 3044 |
| 14 | March | City Hotel | 6434 |
| 15 | March | Resort Hotel | 3334 |
| 16 | May | City Hotel | 8221 |
| 17 | May | Resort Hotel | 3559 |
| 18 | November | City Hotel | 4336 |
| 19 | November | Resort Hotel | 2435 |
| 20 | October | City Hotel | 7594 |
| 21 | October | Resort Hotel | 3553 |
| 22 | September | City Hotel | 7392 |
| 23 | September | Resort Hotel | 3108 |
cancelled=cancelled.rename(columns={'is_canceled':'no of cancellations'})
cancelled
| arrival_date_month | hotel | no of cancellations | |
|---|---|---|---|
| 0 | April | City Hotel | 7469 |
| 1 | April | Resort Hotel | 3609 |
| 2 | August | City Hotel | 8967 |
| 3 | August | Resort Hotel | 4894 |
| 4 | December | City Hotel | 4114 |
| 5 | December | Resort Hotel | 2645 |
| 6 | February | City Hotel | 4950 |
| 7 | February | Resort Hotel | 3102 |
| 8 | January | City Hotel | 3730 |
| 9 | January | Resort Hotel | 2191 |
| 10 | July | City Hotel | 8071 |
| 11 | July | Resort Hotel | 4573 |
| 12 | June | City Hotel | 7885 |
| 13 | June | Resort Hotel | 3044 |
| 14 | March | City Hotel | 6434 |
| 15 | March | Resort Hotel | 3334 |
| 16 | May | City Hotel | 8221 |
| 17 | May | Resort Hotel | 3559 |
| 18 | November | City Hotel | 4336 |
| 19 | November | Resort Hotel | 2435 |
| 20 | October | City Hotel | 7594 |
| 21 | October | Resort Hotel | 3553 |
| 22 | September | City Hotel | 7392 |
| 23 | September | Resort Hotel | 3108 |
final=sd.Sort_Dataframeby_Month(cancelled,'arrival_date_month')
final
| arrival_date_month | hotel | no of cancellations | |
|---|---|---|---|
| 0 | January | City Hotel | 3730 |
| 1 | January | Resort Hotel | 2191 |
| 2 | February | City Hotel | 4950 |
| 3 | February | Resort Hotel | 3102 |
| 4 | March | City Hotel | 6434 |
| 5 | March | Resort Hotel | 3334 |
| 6 | April | City Hotel | 7469 |
| 7 | April | Resort Hotel | 3609 |
| 8 | May | City Hotel | 8221 |
| 9 | May | Resort Hotel | 3559 |
| 10 | June | City Hotel | 7885 |
| 11 | June | Resort Hotel | 3044 |
| 12 | July | City Hotel | 8071 |
| 13 | July | Resort Hotel | 4573 |
| 14 | August | City Hotel | 8967 |
| 15 | August | Resort Hotel | 4894 |
| 16 | September | City Hotel | 7392 |
| 17 | September | Resort Hotel | 3108 |
| 18 | October | City Hotel | 7594 |
| 19 | October | Resort Hotel | 3553 |
| 20 | November | City Hotel | 4336 |
| 21 | November | Resort Hotel | 2435 |
| 22 | December | City Hotel | 4114 |
| 23 | December | Resort Hotel | 2645 |
plt.figure(figsize=(12, 8))
sns.barplot(x = "arrival_date_month", y = "no of cancellations" , hue="hotel",
hue_order = ["City Hotel", "Resort Hotel"], data=final)
plt.show()
For the City hotel the relative number of cancelations is around 40 % throughout the year.
For the Resort hotel it is highest in the summer and lowest during the winter.